For this one, you really need to read the docs.
import geopandas as gpd
url = 'https://bit.ly/3neINBV'
boros = gpd.read_file(url, driver='GPKG')
boros.plot(color='none', edgecolor='red');boros['hulls'] = boros.geometry.convex_hull
boros = boros.set_geometry('hulls')
boros.plot(column='NAME', categorical=True, alpha=0.5);boros['region'] = 'London'
boros = boros.set_geometry('geometry') # Set back to original geom
ldn = boros.dissolve(by='region') # And dissolve to a single poly
f,ax = plt.subplots(figsize=(10,8)) # New plot
ldn.plot(ax=ax) # Add London layer to axisAnd some nice chaining…
Load the CSV file and convert to GeoDataFrame:
import pandas as pd
url = 'https://bit.ly/3I0XDrq'
df = pd.read_csv(url)
df['price'] = df.price.str.replace('$','',regex=False).astype('float')
df.set_index('id',inplace=True)
gdf = gpd.GeoDataFrame(df,
geometry=gpd.points_from_xy(
df['longitude'], df['latitude'], crs='epsg:4326'
)
)
gdf = gdf.to_crs('epsg:27700')import pysal as ps
# https://github.com/pysal/mapclassify
import mapclassify as mc
# https://jiffyclub.github.io/palettable/
import palettable.matplotlib as palmpl
from legendgram import legendgram
f,ax = plt.subplots(figsize=(10,8))
gdf.plot(column='price', scheme='Quantiles', cmap='magma', k=5, ax=ax)
q = mc.Quantiles(gdf.price.array, k=5)
# https://github.com/pysal/legendgram/blob/master/legendgram/legendgram.py
legendgram(f, ax,
gdf.price, q.bins, pal=palmpl.Magma_5,
legend_size=(.4,.2), # legend size in fractions of the axis
loc = 'upper left', # mpl-style legend loc
clip = (0,500), # clip range of the histogram
frameon=True)from pysal.lib import weights
w = weights.KNN.from_dataframe(gdf, k=3)
gdf['w_price'] = weights.lag_spatial(w, gdf.price)
gdf[['name','price','w_price']].sample(5, random_state=42)| name | price | w_price | |
|---|---|---|---|
| 83 | Southfields Home | 85.0 | 263.0 |
| 53 | Flat in Islington, Central London | 55.0 | 190.0 |
| 70 | 3bedroom Family Home minutes from Kensington Tube | 221.0 | 470.0 |
| 453 | Bed, 20 min to Liverpool st, EAST LONDON | 110.0 | 186.0 |
| 44 | Avni Kensington Hotel | 430.0 | 821.0 |
w2 = weights.DistanceBand.from_dataframe(gdf, threshold=2000, alpha=-0.25)
gdf['price_std'] = (gdf.price - gdf.price.mean()) / gdf.price.std()
gdf['w_price_std'] = weights.lag_spatial(w2, gdf.price_std)
gdf[['name','price_std','w_price_std']].sample(5, random_state=42)| name | price_std | w_price_std | |
|---|---|---|---|
| 83 | Southfields Home | -0.27 | 0.00 |
| 53 | Flat in Islington, Central London | -0.51 | -0.58 |
| 70 | 3bedroom Family Home minutes from Kensington Tube | 0.83 | 0.46 |
| 453 | Bed, 20 min to Liverpool st, EAST LONDON | -0.07 | -0.82 |
| 44 | Avni Kensington Hotel | 2.52 | 3.25 |
mi = esda.Moran(gdf['price'], w)
print(f"{mi.I:0.4f}")
print(f"{mi.p_sim:0.4f}")
moran_scatterplot(mi)lisa = esda.Moran_Local(gdf.price, w)
# Break observations into significant or not
gdf['sig'] = lisa.p_sim < 0.05
# Store the quadrant they belong to
gdf['quad'] = lisa.q
gdf[['name','price','sig','quad']].sample(5, random_state=42)| name | price | sig | quad | |
|---|---|---|---|---|
| 83 | Southfields Home | 85.0 | False | 3 |
| 53 | Flat in Islington, Central London | 55.0 | False | 3 |
| 70 | 3bedroom Family Home minutes from Kensington Tube | 221.0 | False | 1 |
| 453 | Bed, 20 min to Liverpool st, EAST LONDON | 110.0 | False | 3 |
| 44 | Avni Kensington Hotel | 430.0 | False | 1 |
There’s so much more to find, but:
Exploratory Spatial Data Analysis • Jon Reades